Neurodevelopment
1 Clean data
Here’s the data from our neurodevelopment gene panel, which has been cleaned by:
- Removing missing
DCqvalues - Removing genes missing either N or IH
Condition - Removing genes having less than 3 data points in either the N or IH
Condition
2 Model fitting & diagnostics
2.1 Fitting the model
Let’s define the model we will fit to each Gene’s DCq data.
Here, we will fit a simple Linear Model, which is largely similar to running a t-test between both conditions:
\[ \begin{aligned} DCq &\sim N \left(\alpha + \beta_{1}(Condition), \sigma^2 \right) \end{aligned} \]
Now, let’s fit said model to each Gene’s data, for a given Stage and Layer:
compute_fold_change
compute_fold_change <- function(mod) {
return(
get_data(mod)
|> select(Condition, DCq)
|> pivot_wider(names_from = Condition, values_from = DCq, values_fn = mean)
|> summarize(Fold = 2**(-1 * (IH - N)))
|> pull(Fold)
)
}(ND_data$models <- ND_data$clean
|> group_split(Stage, Layer, Gene)
|> map_dfr(
\(d) summarize(d, Mod = pick(Condition, DCq) |> ND_model() |> list(), .by = c(Stage, Layer, Gene)),
.progress = "Fitting models:"
)
|> filter(!has_na_coefs(Mod)) # Removing models that did not fit properly
|> mutate(Fold = map_dbl(Mod, compute_fold_change)) # Adding the Fold change
|> select(Stage, Layer, Gene, Fold, Mod)
)2.2 Model diagnostics
Here are the diagnostic plots for a random sample of the fitted models:
3 Model analysis
For each model we fit, we can then extract the CI and p.value for the relevant contrasts, and use those to establish if a Gene was up or down-regulated:
get_emmeans_data
get_emmeans_data <- function(mod) {
return(
emmeans(mod, specs = "Condition", type = "response")
|> contrast(method = "pairwise", adjust = "none", infer = TRUE)
|> as.data.frame()
|> pivot_wider(names_from = contrast, values_from = estimate)
|> select(last_col(), LCB = lower.CL, UCB = upper.CL, p.value)
|> mutate(across(where(is.character), \(x) na_if(x, "NaN")))
)
}(ND_data$predictions <- ND_data$models
|> group_split(Stage, Layer, Gene)
|> map_dfr(\(d) mutate(d, get_emmeans_data(Mod[[1]])), .progress = "Extracting model predictions:")
|> filter(!is.na(p.value))
|> mutate(Expression = get_regulation_type(Fold, p.value))
|> select(Stage, Layer, Gene, Fold, Expression, matches("-|/"), LCB, UCB, p.value)
)3.1 Gene regulation overview
We can get a general overview of which Gene are up or down-regulated through a Sunburst plot, stacked by Stage and Layer.
make_suburst_plot
make_suburst_plot <- function(dat, layers, tooltips = NULL, colors = NULL, plot_options = list()) {
if (!is.null(colors)) colors <- set_names(colors, \(x) x |> str_replace_all(" ", "\n") |> str_replace_all("and\n", "and "))
.make_sunburst_data <- function(dat, layers_part) {
if (all(layers == layers_part))
sun_dat <- summarize(dat, across(any_of(c(layers_part, tooltips)), first), .by = all_of(layers_part))
else
sun_dat <- summarize(dat, across(any_of(layers_part), first), .by = all_of(layers_part))
return(
bind_cols(
transmute(sun_dat, ids = pick(any_of(layers_part)) |> pmap_chr(\(...) str_c(..., sep = ">>"))),
transmute(
sun_dat,
parents = pick(any_of(layers_part)) |>
select(-last_col()) |>
pmap_chr(\(...) str_c(..., sep = ">>")) |>
bind(x, if (is_empty(x)) "" else x)
),
transmute(sun_dat, labels = pick(any_of(layers_part)) |> select(last_col()) |> pull(1))
)
|> bind(
x,
if (all(layers == layers_part) & !is.null(tooltips))
bind_cols(
x,
transmute(
sun_dat,
hovertext = pmap_chr(
pick(all_of(tooltips)),
\(...) str_c(
names(c(...)),
map_if(list(...), is.numeric, \(x) round(x, 3)) |> map_if(is.factor, as.character),
sep = ": ",
collapse = "\n"
)
)
)
)
else x
)
|> mutate(labels = labels |> str_replace_all(" ", "\n") |> str_replace_all("and\n", "and "))
)
}
params <- map_dfr(1:length(layers), \(x) .make_sunburst_data(dat, layers[1:x])) |>
mutate(bg_color = map_chr(labels, \(x) colors[[x]] %||% NA))
rlang::exec(
plotly::plot_ly,
!!!select(params, -bg_color),
type = "sunburst",
branchvalues = "total",
hoverinfo = "text",
sort = FALSE,
marker = list(colors = ~params$bg_color),
!!!plot_options
)
}ND_data$predictions |>
filter(Expression != regulation_type$NOT_REG) |>
left_join(supplementary_data$gene_data$ND, join_by(Gene)) |>
make_suburst_plot(
layers = c("Stage", "Layer", "Expression", "Gene"),
tooltips = c("Gene", "Pathway", "Pathway_family", "Fold", "p.value"),
colors = sunburst_pcr_colors,
plot_options = list(insidetextorientation = 'radial')
)3.2 Regulation & Pathways
We can extend the previous plot to better visualize to which Pathway the regulated genes are linked to.
make_circlize_plot
make_circlize_plot <- function(dat, resolution = 15000, save_to = NULL) {
stage_list <- read_excel(configs$data$PCR$data_dict, sheet = "Stage")$Name
layer_list <- read_excel(configs$data$PCR$data_dict, sheet = "Layer")$Name |> discard(\(x) x == "Whole")
pathway_families <- read_excel(configs$data$PCR$data_dict, sheet = "Pathway_family")$Name
crossed_layer_order <- (
crossing(Stage = stage_list, Layer = layer_list)
|> mutate(across(c(Stage, Layer), \(x) to_factor(x, configs$data$PCR$data_dict)))
|> arrange(Stage, Layer)
|> unite(c(Stage, Layer), col = "Layer", sep = "_", remove = FALSE)
|> pull(Layer)
)
circlize_data <- (
dat
|> filter(p.value <= .05)
|> select(Stage, Layer, Gene, Pathway, Pathway_family, Fold, p.value, Expression)
|> unite(c(Stage, Layer), col = "Layer", sep = "_", remove = FALSE)
|> mutate(Layer = factor(Layer, levels = crossed_layer_order))
|> arrange(Layer)
|> droplevels()
)
df_chord <- (
circlize_data
|> distinct(Layer, Pathway, Gene)
|> summarize(N_reg = n(), .by = c(Layer, Pathway))
|> mutate(Layer = as.character(Layer))
)
mat_chord <- (
xtabs(
N_reg ~ Layer + Pathway,
data = df_chord |>
mutate(
Layer = factor(Layer, levels = crossed_layer_order),
Pathway = to_factor(Pathway, configs$data$PCR$data_dict)
) |>
arrange(Layer),
drop.unused.levels = TRUE
)
)
df_layer <- (
tibble(Sector = rownames(mat_chord))
|> mutate(N_Connection = rowSums(mat_chord))
|> filter(N_Connection > 0)
)
df_pathway <- (
tibble(Sector = colnames(mat_chord))
|> mutate(N_Connection = colSums(mat_chord))
|> filter(N_Connection > 0)
)
qual_col_pals <- RColorBrewer::brewer.pal.info[RColorBrewer::brewer.pal.info$category == 'qual',]
col_vector <- unlist(mapply(RColorBrewer::brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
df_extra <- (
bind_rows(df_layer, df_pathway)
|> mutate(
Color = col_vector[1:n()],
Layer = ifelse(str_detect(Sector, "P[0-9]+_"), str_remove(Sector, "P[0-9]+_"), NA),
Stage = str_extract(Sector, "P[0-9]+"),
)
|> select(Sector, Layer, Stage, Color, N_Connection)
)
get_genes_barplot <- function(index, theta) {
return(
circlize_data
|> filter(Layer == index)
|> distinct(Gene, Fold, p.value, Expression)
|> mutate(
Orientation = theta,
Label = ifelse(
Orientation < 90 | Orientation > 270,
as.character(str_glue("{Gene} ({round(Fold, 2)}) {gtools::stars.pval(p.value)}")),
as.character(str_glue("{gtools::stars.pval(p.value)} ({round(Fold, 2)}) {Gene}"))
),
Fold = ifelse(
Fold >= 1,
scales::rescale(Fold, to = c(0, 1), from = c(1, max(circlize_data$Fold))),
scales::rescale(Fold, to = c(-1, 0), from = c(min(circlize_data$Fold), 1))
),
Color = ifelse(str_detect(Expression, regulation_type$UPREG), colors_fold[2], colors_fold[1])
)
|> arrange(Gene)
|> select(Fold, Label, Color)
)
}
# Drawing the plot
if (!is.null(save_to)) png(save_to, width = resolution, height = resolution)
circos.clear()
circos.par(cell.padding = c(0, 0, 0, 0), points.overflow.warning = FALSE, start.degree = 180)
colors <- c(
"#03937e","#02ccae","#86e3d5","#b9f0e7",
"#0264cc","#3680cf","#66a0de","#95bce6","#bed1e6",
"#5204b3","#7835cc","#9965db","#ac85de","#d1bbed",
"#76187a", "#b625bb","#e37ae6","#eeb1f0",
"#8c013b", "#b8024e","#f589b6",
"#f02b00","#faa491","#f59002","#ffda8f","#81c700", "#d1eb52", "#01944b", "#78e3a1"
)
## Chord diagram
h0 <- resolution / 220
h1 <- resolution / 180
h2 <- resolution / 90
chord_plot <- circlize::chordDiagram(
as.matrix(mat_chord),
grid.col = colors,
directional = 1,
diffHeight = mm_h(h0*1.1),
target.prop.height = mm_h(h0),
direction.type = c("diffHeight", "arrows"),
link.arr.type = "big.arrow",
# big.gap = 10,
small.gap = 0.15,
transparency = 0.20,
annotationTrack = "grid",
preAllocateTracks = list(
list(
track.height = mm_h(h1)
), # Track 1 (or 3 ?)
list(
track.height = mm_h(h2),
track.margin = c(mm_h(h2), 0)
), # Track 2
list(
track.height = mm_h(h1)
) # Track 3 (or 1 ?)
),
annotationTrackHeight = mm_h(h1), # Track 4
scale = TRUE
)
## Track 4 : Layer/Stage legend
track_chord <- 4
cex_chord <- resolution / 1350
circos.track(
track.index = track_chord,
panel.fun = function(x, y) {
theta <- (circlize(mean(CELL_META$xlim), 1.3)[1, 1] %% 360)[[1]]
facing <- ifelse(theta > 220 && theta < 320, c("outside", "bending.outside"), c("inside", "bending.inside"))
circos.text(
CELL_META$xcenter,
CELL_META$ycenter,
str_remove(CELL_META$sector.index, "P[0-9]+_"),
col = "black",
font = 1,
cex = cex_chord,
adj = c(0.5, 0.5),
facing = facing
)
},
bg.border = NA
)
## Track 3 : Stage & Pathway family legend
track_families <- 3
cex_families <- resolution / 1200
colors.stages <- c("#03937e", "#0264cc", "#5204b3", "#76187a", "#8c013b")
for (stage in stage_list) {
highlight.sector(
sector.index = df_extra |> filter(Stage == stage) |> pull(Sector), # purrr::keep(df_extra$Sector, \(x) str_detect(x, "P4"))
track.index = track_families,
col = colors.stages[which(stage_list == stage)[[1]]],
text = stage,
cex = cex_families,
font = 2,
text.col = "white",
facing = "bending.inside",
niceFacing = FALSE
)
}
colors.pf <- c("#eb4f2d","#83c108", "#f5a402", "#2cc969")
for (pf in pathway_families) {
circlize_data |> filter(Pathway_family == pf) |> distinct(Pathway) |> pull()
highlight.sector(
sector.index = circlize_data |> filter(Pathway_family == pf) |> distinct(Pathway) |> pull(),
track.index = track_families,
col = colors.pf[which(pathway_families == pf)[[1]]],
text = pf,
cex = cex_families,
font = 2,
text.col = "white",
facing = "bending.outside",
niceFacing = TRUE
)
}
## Track 2 : barplot
track_barplot <- 2
cex_barplot <- resolution / 2000
circos.track(
track.index = track_barplot,
panel.fun = \(x, y) {
if (str_detect(CELL_META$sector.index, "P[0-9]+_")) {
circos.segments(
x0 = 0,
x1 = 1,
y0 = 0,
y1 = 0,
lty = "solid"
)
draw.sector(
get.cell.meta.data("cell.start.degree", sector.index = CELL_META$sector.index),
get.cell.meta.data("cell.end.degree", sector.index = CELL_META$sector.index),
rou1 = get.cell.meta.data("cell.top.radius", track.index = track_barplot),
rou2 = get.cell.meta.data("cell.top.radius", track.index = track_barplot + 1) + mm_h(2),
)
theta <- (circlize(mean(CELL_META$xlim), 1.3)[1, 1] %% 360)[[1]]
genes_barplot <- get_genes_barplot(CELL_META$sector.index, theta)
# xpos <- seq(from = 0.1, to = 0.9, length.out = nrow(genes_barplot))
# xamp <- abs(CELL_META$cell.xlim[2] - CELL_META$cell.xlim[1])
xpos <- seq(from = CELL_META$cell.xlim[1] + 0.05 , to = CELL_META$cell.xlim[2] - 0.05 , length.out = nrow(genes_barplot))
facing <- ifelse(theta < 90 || theta > 270, "clockwise", "reverse.clockwise")
adjust <- ifelse(theta < 90 || theta > 270, c(0, 0.5), c(1, 0.5))
if (nrow(genes_barplot) > 0) {
circos.barplot(
pos = xpos,
value = genes_barplot$Fold,
bar_width = 0.025,
col = genes_barplot$Color,
border = genes_barplot$Color
)
circos.text(
x = xpos,
y = get.cell.meta.data("cell.top.radius", track.index = track_barplot) + 0.2,
labels = genes_barplot$Label,
col = genes_barplot$Color,
font = 2,
cex = cex_barplot,
adj = c(adjust),
facing = c(facing)
)
}
}
},
bg.border = NA
)
## Track 1 : Empty (gene labels)
if (!is.null(save_to)) dev.off()
}3.3 Gene regulation timeline
To get a better idea of how each Gene’s regulation changes through time, we can plot a timeline of their expression, split by Layer and Pathway.
make_fold_timeline_plot
make_fold_timeline_plot <- function(
dat, facet_rows = "Pathway", trans = "identity",
color_by = NULL, colors = colors_effect, size_boost = 1
) {
origin <- do.call(trans, list(1))
dat <- (
dat
|> mutate(Fold_trans = do.call(trans, list(Fold)))
|> mutate(Fold_Amp = ifelse(
max(Fold_trans, na.rm = TRUE) - min(Fold_trans, na.rm = TRUE) != 0,
max(Fold_trans, na.rm = TRUE) - min(Fold_trans, na.rm = TRUE),
mean(Fold_trans, na.rm = TRUE)) * 0.1,
.by = all_of(c(facet_rows, "Stage"))
)
)
timeline <- (
ggplot(dat)
+ { if(is.null(color_by)) aes(x = Gene, color = Fold >= 1) else aes(x = Gene, color = .data[[color_by]]) }
+ geom_linerange(aes(ymax = Fold_trans), ymin = origin, size = 2 + (size_boost * 0.5))
+ geom_hline(yintercept = origin, size = 0.3, linetype = "dotted")
+ geom_text(aes(
label = str_c(round(Fold, 2), stars.pval(p.value) |> str_replace(fixed("."), ""), sep = " "),
y = ifelse(Fold_trans > origin, Fold_trans + Fold_Amp, Fold_trans - Fold_Amp),
hjust = ifelse(Fold > 1, 0, 1)
),
vjust = 0.5, angle = 0, size = 2 + (size_boost * 0.25), check_overlap = TRUE
)
+ scale_color_manual(" ", values = colors)
+ scale_y_continuous(breaks = c(0,1,2,3), expand = expansion(mult = 1.01 * (1 + (size_boost/100))))
+ scale_x_discrete(expand = expansion(add = 1 * size_boost), limits = \(x) rev(x))
+ labs(
x = "",
y = ifelse(trans != "identity", str_glue("Fold Change *({trans} scale)*"), "Fold Change")
)
+ coord_flip()
+ facet_grid(
vars(.data[[facet_rows]]), vars(Stage),
scales = "free_y", space = "free_y", labeller = label_wrap_gen(width = 12, multi_line = TRUE)
)
+ { if (!is.null(color_by)) guides(color = guide_legend(title = color_by)) }
+ theme(
legend.position = ifelse(is.null(color_by), "none", "bottom")
, axis.text.x = element_blank()
, axis.title.x = element_markdown(size = 9)
, axis.text.y = element_text(size = 7)
, strip.text = element_text(size = 5 * size_boost)
, plot.title = element_markdown(size = 9, face = "plain", vjust = 1, hjust = 0.5)
)
)
return(timeline)
}render_ND_timeline
render_ND_timeline <- function(dat, group) {
cur_group_name <- get_var_level_name("Layer", first(group[[1]]), "PCR")
plot <- make_fold_timeline_plot(dat, facet_rows = "Pathway_family", trans = "log", colors = colors_fold, size_boost = 1.5)
width <- 2 + n_distinct(dat$Stage)
height <- dat |>
group_by(Pathway_family) |>
group_map(\(d, g) n_distinct(d$Gene) * 0.1 + 1.3) |>
flatten_dbl() |>
sum()
template_md <- c(
'### `r cur_group_name`',
'```{r}',
'#| echo: false',
'#| fig-width: !expr width',
'#| fig-height: !expr height',
'plot',
'```'
)
knitr::knit_child(text = template_md, envir = rlang::env(), quiet = TRUE)
}ND_timelines <- (
ND_data$predictions
|> left_join(supplementary_data$gene_data$ND, join_by(Gene))
|> left_join(supplementary_data$layer_families, join_by(Layer))
|> filter(p.value <= .05)
|> filter(!(Layer_family == "PC" & Stage == "P4")) # PCs are too small and undifferentiated at that stage to properly microdissect
|> select(Stage, Layer, Layer_family, Gene, Fold, p.value, Expression, Pathway, Pathway_family)
|> mutate(Stage = case_when(
Layer %in% c("EGLi", "EGLo") ~ str_glue("{Stage} ({Layer})"),
.default = as.character(Stage)
)
|> factor(levels = c("P4", "P8", "P8 (EGLo)", "P8 (EGLi)", "P12", "P21", "P70"))
)
|> group_by(Layer_family)
|> group_map(render_ND_timeline)
|> unlist()
)